logo of company

Bioinformatics pipeline summary


Where we see the pipeline processes

Author: Adrien Taudière

Date: December 3, 2024

Code
library(knitr)
library(targets)
library(MiscMetabar)
library(patchwork)
here::i_am("analysis/01_bioinformatics.qmd")
source(here::here("R/styles.R"))
source(here::here("R/functions.R"))
Code
conflicted::conflicts_prefer(dplyr::filter)
[conflicted] Will prefer dplyr::filter over any other package.
Code
conflicted::conflicts_prefer(dplyr::rename)
[conflicted] Will prefer dplyr::rename over any other package.

Carbon footprint

Code
res_ga <- greenAlgoR::ga_targets(store = here::here("_targets/"))
library("emojifont")
ggvalue_box(
  round(res_ga$carbon_footprint_total_gCO2),
  "Carbon footprint (g)",
  icons = fontawesome(search_fontawesome("industry")[1]),
  alpha_icon = 0.7,
  colors = idest_pal$ligth_color_idest[[1]][[3]],
  color_icon = idest_pal$dark_color_idest[[1]][[5]],
  value_font_color = idest_pal$dark_color_idest[[1]][[5]],
  lab_font_color = idest_pal$dark_color_idest[[1]][[5]]
) +
  ggvalue_box(
    round(res_ga$energy_needed_kWh, 2),
    "Energy used (kWh)",
    icons = fontawesome(search_fontawesome("bolt")),
    alpha_icon = 0.7,
    colors = idest_pal$ligth_color_idest[[1]][[1]],
    color_icon = idest_pal$dark_color_idest[[1]][[2]],
    value_font_color = idest_pal$dark_color_idest[[1]][[2]],
    lab_font_color = idest_pal$dark_color_idest[[1]][[2]]
  )

Summary of the bioinformatic pipeline

Timeline and cpu usage

Code
log_data <- autometric::log_read(
  here::here("data/data_final/autometric_log.txt"),
  units_time = "hours",
  units_memory = "gigabytes"
)

res <- NULL
for(pid_char in as.character(rev(unique(log_data$pid)))) {
  print(pid_char)
  print(paste0("NROW=", nrow(res)))
  for (phase_char in unique(log_data$phase)) {
    print(phase_char)
    if (sum(res$phase == phase_char) == 0) {
      ld <- log_data |>
        filter(phase == phase_char) |>
        filter(pid == pid_char)
      print(paste0("nrow_ld:",nrow(ld)))
      res <- bind_rows(ld, res)
    } else {
      print(paste0("nrow=", nrow(res)))
    }
  }
}
[1] "10720"
[1] "NROW="
[1] "__DEFAULT__"
[1] "nrow_ld:12"
[1] "cutadapt"
[1] "nrow_ld:0"
[1] "quality_seq_wo_primers"
[1] "nrow_ld:0"
[1] "filtered"
[1] "nrow_ld:0"
[1] "prepare: derep_fs"
[1] "nrow_ld:0"
[1] "derep_fs"
[1] "nrow_ld:0"
[1] "conclude: derep_fs"
[1] "nrow_ld:0"
[1] "quality_seq_filtered_trimmed_FW"
[1] "nrow_ld:0"
[1] "quality_seq_filtered_trimmed_REV"
[1] "nrow_ld:0"
[1] "derep_rs"
[1] "nrow_ld:0"
[1] "conclude: derep_rs"
[1] "nrow_ld:0"
[1] "err_fs"
[1] "nrow_ld:0"
[1] "err_rs"
[1] "nrow_ld:0"
[1] "ddF"
[1] "nrow_ld:0"
[1] "conclude: ddF"
[1] "nrow_ld:0"
[1] "ddR"
[1] "nrow_ld:0"
[1] "conclude: ddR"
[1] "nrow_ld:0"
[1] "merged_seq"
[1] "nrow_ld:0"
[1] "seq_tab_Pairs"
[1] "nrow_ld:0"
[1] "conclude: seq_tab_Pairs"
[1] "nrow_ld:0"
[1] "seqtab_wo_chimera"
[1] "nrow_ld:0"
[1] "conclude: seqtab_wo_chimera"
[1] "nrow_ld:0"
[1] "tax_tab"
[1] "nrow_ld:0"
[1] "conclude: tax_tab"
[1] "nrow_ld:0"
[1] "conclude: data_phyloseq"
[1] "nrow_ld:0"
[1] "d_asv"
[1] "nrow_ld:0"
[1] "d_vs"
[1] "nrow_ld:0"
[1] "d_vs_mumu"
[1] "nrow_ld:0"
[1] "conclude: d_vs_mumu"
[1] "nrow_ld:0"
[1] "track_by_samples"
[1] "nrow_ld:0"
[1] "bioinfo_report"
[1] "nrow_ld:68"
[1] "prepare: track_sequences_samples_clusters"
[1] "nrow_ld:0"
[1] "track_sequences_samples_clusters"
[1] "nrow_ld:0"
[1] "fastq_files_folder"
[1] "nrow_ld:0"
[1] "quality_raw_seq"
[1] "nrow_ld:0"
[1] "data_phyloseq"
[1] "nrow_ld:0"
[1] "conclude: d_vs"
[1] "nrow_ld:0"
[1] "d_vs_mumu_rarefy"
[1] "nrow_ld:0"
[1] "conclude: d_vs_mumu_rarefy"
[1] "nrow_ld:0"
[1] "conclude: track_sequences_samples_clusters"
[1] "nrow_ld:0"
[1] "150107"
[1] "NROW=80"
[1] "__DEFAULT__"
[1] "nrow=80"
[1] "cutadapt"
[1] "nrow_ld:0"
[1] "quality_seq_wo_primers"
[1] "nrow_ld:0"
[1] "filtered"
[1] "nrow_ld:0"
[1] "prepare: derep_fs"
[1] "nrow_ld:0"
[1] "derep_fs"
[1] "nrow_ld:0"
[1] "conclude: derep_fs"
[1] "nrow_ld:0"
[1] "quality_seq_filtered_trimmed_FW"
[1] "nrow_ld:0"
[1] "quality_seq_filtered_trimmed_REV"
[1] "nrow_ld:0"
[1] "derep_rs"
[1] "nrow_ld:0"
[1] "conclude: derep_rs"
[1] "nrow_ld:0"
[1] "err_fs"
[1] "nrow_ld:0"
[1] "err_rs"
[1] "nrow_ld:0"
[1] "ddF"
[1] "nrow_ld:0"
[1] "conclude: ddF"
[1] "nrow_ld:0"
[1] "ddR"
[1] "nrow_ld:0"
[1] "conclude: ddR"
[1] "nrow_ld:0"
[1] "merged_seq"
[1] "nrow_ld:0"
[1] "seq_tab_Pairs"
[1] "nrow_ld:0"
[1] "conclude: seq_tab_Pairs"
[1] "nrow_ld:0"
[1] "seqtab_wo_chimera"
[1] "nrow_ld:0"
[1] "conclude: seqtab_wo_chimera"
[1] "nrow_ld:0"
[1] "tax_tab"
[1] "nrow_ld:0"
[1] "conclude: tax_tab"
[1] "nrow_ld:0"
[1] "conclude: data_phyloseq"
[1] "nrow_ld:0"
[1] "d_asv"
[1] "nrow_ld:0"
[1] "d_vs"
[1] "nrow_ld:0"
[1] "d_vs_mumu"
[1] "nrow_ld:0"
[1] "conclude: d_vs_mumu"
[1] "nrow_ld:0"
[1] "track_by_samples"
[1] "nrow_ld:0"
[1] "bioinfo_report"
[1] "nrow=80"
[1] "prepare: track_sequences_samples_clusters"
[1] "nrow_ld:0"
[1] "track_sequences_samples_clusters"
[1] "nrow_ld:0"
[1] "fastq_files_folder"
[1] "nrow_ld:0"
[1] "quality_raw_seq"
[1] "nrow_ld:0"
[1] "data_phyloseq"
[1] "nrow_ld:0"
[1] "conclude: d_vs"
[1] "nrow_ld:0"
[1] "d_vs_mumu_rarefy"
[1] "nrow_ld:0"
[1] "conclude: d_vs_mumu_rarefy"
[1] "nrow_ld:0"
[1] "conclude: track_sequences_samples_clusters"
[1] "nrow_ld:0"
[1] "77376"
[1] "NROW=80"
[1] "__DEFAULT__"
[1] "nrow=80"
[1] "cutadapt"
[1] "nrow_ld:350"
[1] "quality_seq_wo_primers"
[1] "nrow_ld:498"
[1] "filtered"
[1] "nrow_ld:509"
[1] "prepare: derep_fs"
[1] "nrow_ld:0"
[1] "derep_fs"
[1] "nrow_ld:160"
[1] "conclude: derep_fs"
[1] "nrow_ld:30"
[1] "quality_seq_filtered_trimmed_FW"
[1] "nrow_ld:242"
[1] "quality_seq_filtered_trimmed_REV"
[1] "nrow_ld:242"
[1] "derep_rs"
[1] "nrow_ld:173"
[1] "conclude: derep_rs"
[1] "nrow_ld:50"
[1] "err_fs"
[1] "nrow_ld:78"
[1] "err_rs"
[1] "nrow_ld:138"
[1] "ddF"
[1] "nrow_ld:1070"
[1] "conclude: ddF"
[1] "nrow_ld:2"
[1] "ddR"
[1] "nrow_ld:1195"
[1] "conclude: ddR"
[1] "nrow_ld:2"
[1] "merged_seq"
[1] "nrow_ld:117"
[1] "seq_tab_Pairs"
[1] "nrow_ld:0"
[1] "conclude: seq_tab_Pairs"
[1] "nrow_ld:1"
[1] "seqtab_wo_chimera"
[1] "nrow_ld:25"
[1] "conclude: seqtab_wo_chimera"
[1] "nrow_ld:1"
[1] "tax_tab"
[1] "nrow_ld:8945"
[1] "conclude: tax_tab"
[1] "nrow_ld:1"
[1] "conclude: data_phyloseq"
[1] "nrow_ld:0"
[1] "d_asv"
[1] "nrow_ld:6549"
[1] "d_vs"
[1] "nrow_ld:8"
[1] "d_vs_mumu"
[1] "nrow_ld:6"
[1] "conclude: d_vs_mumu"
[1] "nrow_ld:0"
[1] "track_by_samples"
[1] "nrow_ld:131"
[1] "bioinfo_report"
[1] "nrow=20603"
[1] "prepare: track_sequences_samples_clusters"
[1] "nrow_ld:0"
[1] "track_sequences_samples_clusters"
[1] "nrow_ld:97"
[1] "fastq_files_folder"
[1] "nrow_ld:5"
[1] "quality_raw_seq"
[1] "nrow_ld:525"
[1] "data_phyloseq"
[1] "nrow_ld:1"
[1] "conclude: d_vs"
[1] "nrow_ld:1"
[1] "d_vs_mumu_rarefy"
[1] "nrow_ld:1"
[1] "conclude: d_vs_mumu_rarefy"
[1] "nrow_ld:1"
[1] "conclude: track_sequences_samples_clusters"
[1] "nrow_ld:1"
[1] "90857"
[1] "NROW=21235"
[1] "__DEFAULT__"
[1] "nrow=21235"
[1] "cutadapt"
[1] "nrow=21235"
[1] "quality_seq_wo_primers"
[1] "nrow=21235"
[1] "filtered"
[1] "nrow=21235"
[1] "prepare: derep_fs"
[1] "nrow_ld:0"
[1] "derep_fs"
[1] "nrow=21235"
[1] "conclude: derep_fs"
[1] "nrow=21235"
[1] "quality_seq_filtered_trimmed_FW"
[1] "nrow=21235"
[1] "quality_seq_filtered_trimmed_REV"
[1] "nrow=21235"
[1] "derep_rs"
[1] "nrow=21235"
[1] "conclude: derep_rs"
[1] "nrow=21235"
[1] "err_fs"
[1] "nrow=21235"
[1] "err_rs"
[1] "nrow=21235"
[1] "ddF"
[1] "nrow=21235"
[1] "conclude: ddF"
[1] "nrow=21235"
[1] "ddR"
[1] "nrow=21235"
[1] "conclude: ddR"
[1] "nrow=21235"
[1] "merged_seq"
[1] "nrow=21235"
[1] "seq_tab_Pairs"
[1] "nrow_ld:0"
[1] "conclude: seq_tab_Pairs"
[1] "nrow=21235"
[1] "seqtab_wo_chimera"
[1] "nrow=21235"
[1] "conclude: seqtab_wo_chimera"
[1] "nrow=21235"
[1] "tax_tab"
[1] "nrow=21235"
[1] "conclude: tax_tab"
[1] "nrow=21235"
[1] "conclude: data_phyloseq"
[1] "nrow_ld:0"
[1] "d_asv"
[1] "nrow=21235"
[1] "d_vs"
[1] "nrow=21235"
[1] "d_vs_mumu"
[1] "nrow=21235"
[1] "conclude: d_vs_mumu"
[1] "nrow_ld:0"
[1] "track_by_samples"
[1] "nrow=21235"
[1] "bioinfo_report"
[1] "nrow=21235"
[1] "prepare: track_sequences_samples_clusters"
[1] "nrow_ld:4"
[1] "track_sequences_samples_clusters"
[1] "nrow=21239"
[1] "fastq_files_folder"
[1] "nrow=21239"
[1] "quality_raw_seq"
[1] "nrow=21239"
[1] "data_phyloseq"
[1] "nrow=21239"
[1] "conclude: d_vs"
[1] "nrow=21239"
[1] "d_vs_mumu_rarefy"
[1] "nrow=21239"
[1] "conclude: d_vs_mumu_rarefy"
[1] "nrow=21239"
[1] "conclude: track_sequences_samples_clusters"
[1] "nrow=21239"
[1] "75659"
[1] "NROW=21239"
[1] "__DEFAULT__"
[1] "nrow=21239"
[1] "cutadapt"
[1] "nrow=21239"
[1] "quality_seq_wo_primers"
[1] "nrow=21239"
[1] "filtered"
[1] "nrow=21239"
[1] "prepare: derep_fs"
[1] "nrow_ld:0"
[1] "derep_fs"
[1] "nrow=21239"
[1] "conclude: derep_fs"
[1] "nrow=21239"
[1] "quality_seq_filtered_trimmed_FW"
[1] "nrow=21239"
[1] "quality_seq_filtered_trimmed_REV"
[1] "nrow=21239"
[1] "derep_rs"
[1] "nrow=21239"
[1] "conclude: derep_rs"
[1] "nrow=21239"
[1] "err_fs"
[1] "nrow=21239"
[1] "err_rs"
[1] "nrow=21239"
[1] "ddF"
[1] "nrow=21239"
[1] "conclude: ddF"
[1] "nrow=21239"
[1] "ddR"
[1] "nrow=21239"
[1] "conclude: ddR"
[1] "nrow=21239"
[1] "merged_seq"
[1] "nrow=21239"
[1] "seq_tab_Pairs"
[1] "nrow_ld:0"
[1] "conclude: seq_tab_Pairs"
[1] "nrow=21239"
[1] "seqtab_wo_chimera"
[1] "nrow=21239"
[1] "conclude: seqtab_wo_chimera"
[1] "nrow=21239"
[1] "tax_tab"
[1] "nrow=21239"
[1] "conclude: tax_tab"
[1] "nrow=21239"
[1] "conclude: data_phyloseq"
[1] "nrow_ld:0"
[1] "d_asv"
[1] "nrow=21239"
[1] "d_vs"
[1] "nrow=21239"
[1] "d_vs_mumu"
[1] "nrow=21239"
[1] "conclude: d_vs_mumu"
[1] "nrow_ld:0"
[1] "track_by_samples"
[1] "nrow=21239"
[1] "bioinfo_report"
[1] "nrow=21239"
[1] "prepare: track_sequences_samples_clusters"
[1] "nrow=21239"
[1] "track_sequences_samples_clusters"
[1] "nrow=21239"
[1] "fastq_files_folder"
[1] "nrow=21239"
[1] "quality_raw_seq"
[1] "nrow=21239"
[1] "data_phyloseq"
[1] "nrow=21239"
[1] "conclude: d_vs"
[1] "nrow=21239"
[1] "d_vs_mumu_rarefy"
[1] "nrow=21239"
[1] "conclude: d_vs_mumu_rarefy"
[1] "nrow=21239"
[1] "conclude: track_sequences_samples_clusters"
[1] "nrow=21239"
[1] "170193"
[1] "NROW=21239"
[1] "__DEFAULT__"
[1] "nrow=21239"
[1] "cutadapt"
[1] "nrow=21239"
[1] "quality_seq_wo_primers"
[1] "nrow=21239"
[1] "filtered"
[1] "nrow=21239"
[1] "prepare: derep_fs"
[1] "nrow_ld:0"
[1] "derep_fs"
[1] "nrow=21239"
[1] "conclude: derep_fs"
[1] "nrow=21239"
[1] "quality_seq_filtered_trimmed_FW"
[1] "nrow=21239"
[1] "quality_seq_filtered_trimmed_REV"
[1] "nrow=21239"
[1] "derep_rs"
[1] "nrow=21239"
[1] "conclude: derep_rs"
[1] "nrow=21239"
[1] "err_fs"
[1] "nrow=21239"
[1] "err_rs"
[1] "nrow=21239"
[1] "ddF"
[1] "nrow=21239"
[1] "conclude: ddF"
[1] "nrow=21239"
[1] "ddR"
[1] "nrow=21239"
[1] "conclude: ddR"
[1] "nrow=21239"
[1] "merged_seq"
[1] "nrow=21239"
[1] "seq_tab_Pairs"
[1] "nrow_ld:1"
[1] "conclude: seq_tab_Pairs"
[1] "nrow=21240"
[1] "seqtab_wo_chimera"
[1] "nrow=21240"
[1] "conclude: seqtab_wo_chimera"
[1] "nrow=21240"
[1] "tax_tab"
[1] "nrow=21240"
[1] "conclude: tax_tab"
[1] "nrow=21240"
[1] "conclude: data_phyloseq"
[1] "nrow_ld:1"
[1] "d_asv"
[1] "nrow=21241"
[1] "d_vs"
[1] "nrow=21241"
[1] "d_vs_mumu"
[1] "nrow=21241"
[1] "conclude: d_vs_mumu"
[1] "nrow_ld:1"
[1] "track_by_samples"
[1] "nrow=21242"
[1] "bioinfo_report"
[1] "nrow=21242"
[1] "prepare: track_sequences_samples_clusters"
[1] "nrow=21242"
[1] "track_sequences_samples_clusters"
[1] "nrow=21242"
[1] "fastq_files_folder"
[1] "nrow=21242"
[1] "quality_raw_seq"
[1] "nrow=21242"
[1] "data_phyloseq"
[1] "nrow=21242"
[1] "conclude: d_vs"
[1] "nrow=21242"
[1] "d_vs_mumu_rarefy"
[1] "nrow=21242"
[1] "conclude: d_vs_mumu_rarefy"
[1] "nrow=21242"
[1] "conclude: track_sequences_samples_clusters"
[1] "nrow=21242"
[1] "145218"
[1] "NROW=21242"
[1] "__DEFAULT__"
[1] "nrow=21242"
[1] "cutadapt"
[1] "nrow=21242"
[1] "quality_seq_wo_primers"
[1] "nrow=21242"
[1] "filtered"
[1] "nrow=21242"
[1] "prepare: derep_fs"
[1] "nrow_ld:1"
[1] "derep_fs"
[1] "nrow=21243"
[1] "conclude: derep_fs"
[1] "nrow=21243"
[1] "quality_seq_filtered_trimmed_FW"
[1] "nrow=21243"
[1] "quality_seq_filtered_trimmed_REV"
[1] "nrow=21243"
[1] "derep_rs"
[1] "nrow=21243"
[1] "conclude: derep_rs"
[1] "nrow=21243"
[1] "err_fs"
[1] "nrow=21243"
[1] "err_rs"
[1] "nrow=21243"
[1] "ddF"
[1] "nrow=21243"
[1] "conclude: ddF"
[1] "nrow=21243"
[1] "ddR"
[1] "nrow=21243"
[1] "conclude: ddR"
[1] "nrow=21243"
[1] "merged_seq"
[1] "nrow=21243"
[1] "seq_tab_Pairs"
[1] "nrow=21243"
[1] "conclude: seq_tab_Pairs"
[1] "nrow=21243"
[1] "seqtab_wo_chimera"
[1] "nrow=21243"
[1] "conclude: seqtab_wo_chimera"
[1] "nrow=21243"
[1] "tax_tab"
[1] "nrow=21243"
[1] "conclude: tax_tab"
[1] "nrow=21243"
[1] "conclude: data_phyloseq"
[1] "nrow=21243"
[1] "d_asv"
[1] "nrow=21243"
[1] "d_vs"
[1] "nrow=21243"
[1] "d_vs_mumu"
[1] "nrow=21243"
[1] "conclude: d_vs_mumu"
[1] "nrow=21243"
[1] "track_by_samples"
[1] "nrow=21243"
[1] "bioinfo_report"
[1] "nrow=21243"
[1] "prepare: track_sequences_samples_clusters"
[1] "nrow=21243"
[1] "track_sequences_samples_clusters"
[1] "nrow=21243"
[1] "fastq_files_folder"
[1] "nrow=21243"
[1] "quality_raw_seq"
[1] "nrow=21243"
[1] "data_phyloseq"
[1] "nrow=21243"
[1] "conclude: d_vs"
[1] "nrow=21243"
[1] "d_vs_mumu_rarefy"
[1] "nrow=21243"
[1] "conclude: d_vs_mumu_rarefy"
[1] "nrow=21243"
[1] "conclude: track_sequences_samples_clusters"
[1] "nrow=21243"
Code
autom_p <- res |>
  filter(!grepl("conclude:", phase)) |>
  filter(!grepl("prepare:", phase)) |>
  filter(!grepl("__DEFAULT__", phase)) |>
  ggplot(aes(x = time, y = reorder(phase, dplyr::desc(time)), color = resident)) +
  geom_line(aes(linewidth = cpu)) +
  scale_color_viridis_b("Memory (Gb)", end = 0.9, direction = -1) +
  theme_idest() +
  xlab("Time (in hours)") +
  ylab("Targets")

autom_p + xlim(c(167, 174))

Code
data_2 <- autom_p$data |> 
  group_by(phase) |>
  summarise(time=min(time), resident = mean(resident))

autom_p + geom_point(data = data_2, shape=3) + xlim(c(167, 174))

Code
d_pq <- clean_pq(tar_read("d_vs", store = here::here("_targets/")))
Cleaning suppress 2 taxa and 12 samples.
Code
summary_plot_pq(d_pq)
Cleaning suppress 0 taxa and 0 samples.

Code
tar_glimpse(script = here::here("_targets.R"), 
            targets_only = TRUE, 
            callr_arguments = list(show = FALSE))
Code
tar_meta(store = here::here("_targets/"), targets_only = TRUE) |>
  dplyr::mutate(time = paste0(seconds %/% 3600, ":", seconds %/% 60, ":", floor(seconds %% 60))) |>
  dplyr::select(name, seconds, bytes, format, time) |>
  dplyr::mutate(Gb = round(bytes / 10^9, 2)) |>
  dplyr::arrange(desc(seconds), desc(bytes)) |>
  kable()
name seconds bytes format time Gb
tax_tab 8950.164 1291675 rds 2:149:10 0.00
d_asv 6549.617 1750186 rds 1:109:9 0.00
ddR 1195.932 119142011 qs 0:19:55 0.12
ddF 1070.724 105563116 qs 0:17:50 0.11
quality_raw_seq 524.808 18320 rds 0:8:44 0.00
filtered 509.565 182 rds 0:8:29 0.00
quality_seq_wo_primers 497.925 18660 rds 0:8:17 0.00
cutadapt 349.122 86016 file 0:5:49 0.00
quality_seq_filtered_trimmed_FW 240.991 11385 rds 0:4:0 0.00
quality_seq_filtered_trimmed_REV 240.823 11385 rds 0:4:0 0.00
derep_rs 173.771 1932493611 qs 0:2:53 1.93
derep_fs 159.974 1102690227 qs 0:2:39 1.10
err_rs 137.626 24539 qs 0:2:17 0.00
track_by_samples 131.940 9193 rds 0:2:11 0.00
merged_seq 116.616 2140786 qs 0:1:56 0.00
track_sequences_samples_clusters 97.023 444 rds 0:1:37 0.00
err_fs 77.500 20518 qs 0:1:17 0.00
bioinfo_report 67.736 44 rds 0:1:7 0.00
seqtab_wo_chimera 25.524 1287050 rds 0:0:25 0.00
d_vs 7.971 1421936 rds 0:0:7 0.00
d_vs_mumu 5.892 1393310 rds 0:0:5 0.00
d_vs_mumu_rarefy 0.872 1326445 rds 0:0:0 0.00
data_phyloseq 0.587 1619162 rds 0:0:0 0.00
seq_tab_Pairs 0.302 1595472 rds 0:0:0 0.00
s_d 0.046 10962 rds 0:0:0 0.00
asv_tab 0.040 1272832 rds 0:0:0 0.00
seqtab 0.025 1272901 rds 0:0:0 0.00
data_raw 0.021 5385 rds 0:0:0 0.00
sam_tab 0.003 3608 rds 0:0:0 0.00
file_sam_data_csv 0.001 7392 file 0:0:0 0.00
file_refseq_taxo 0.000 114270891 file 0:0:0 0.11
fastq_files_folder 0.000 49152 file 0:0:0 0.00
data_fnfs 0.000 2792 rds 0:0:0 0.00
data_fnrs 0.000 2791 rds 0:0:0 0.00
samp_n_otu_table 0.000 1848 rds 0:0:0 0.00

Load phyloseq object from targets store

Code
d_pq <- tar_read("d_vs", store = here::here("_targets/"))

The {targets} package is at the core of this project. Please read the intro of the user manual if you don’t know {targets}.

The {targets} package store … targets in a folder and can load (tar_load()) and read (tar_read) object from this folder.

Sample data

Code
DT::datatable(d_pq@sam_data)

Sequences, samples and clusters across the pipeline

Make krona files

Code
krona(clean_pq(d_pq, simplify_taxo = TRUE),
  paste0(here::here("data/data_final/"), "krona_unite.html"),
  name = "Unite"
)
Cleaning suppress 2 taxa and 12 samples.
Code
krona(clean_pq(d_pq, simplify_taxo = TRUE),
  ranks = 8:14,
  paste0(here::here("data/data_final/"), "krona_eukaryome.html"), name = "Eukaryome"
)
Cleaning suppress 2 taxa and 12 samples.
Code
merge_krona(c(
  paste0(here::here("data/data_final/"), "krona_unite.html"),
  paste0(here::here("data/data_final/"), "krona_eukaryome.html")
))
Code
krona(clean_pq(d_pq, simplify_taxo = TRUE),
  paste0(here::here("data/data_final/"), "krona_unite_taxa.html"),
  nb_seq = FALSE, name = "Unite"
)
Cleaning suppress 2 taxa and 12 samples.
Code
krona(clean_pq(d_pq, simplify_taxo = TRUE),
  ranks = 8:14,
  paste0(here::here("data/data_final/"), "krona_eukaryome_taxa.html"),
  nb_seq = FALSE, name = "Eukaryome"
)
Cleaning suppress 2 taxa and 12 samples.
Code
merge_krona(
  c(
    paste0(here::here("data/data_final/"), "krona_unite_taxa.html"),
    paste0(here::here("data/data_final/"), "krona_eukaryome_taxa.html")
  ),
  output = paste0(here::here("data/data_final/"), "mergeKrona_taxa.html")
)

Save phyloseq object in R data object

Code
tar_load(d_asv, store = here::here("_targets/"))
tar_load(d_vs, store = here::here("_targets/"))
tar_load(d_vs_mumu, store = here::here("_targets/"))
save(d_asv, d_vs, d_vs_mumu,
  file = paste0(here::here("data/data_final/"), "phyloseq_object.Rdata")
)

Save phyloseq object per project in R data object

Code
d_asv_Camila <- clean_pq(subset_samples(d_asv, Projet == "Camila"),
                         simplify_taxo = TRUE)
Cleaning suppress 14510 taxa and 0 samples.
Code
d_asv_DIVFUN <- clean_pq(subset_samples(d_asv, Projet == "DIVFUN"), 
                         simplify_taxo = TRUE)
Cleaning suppress 5742 taxa and 10 samples.
Code
d_asv_Cistaceae <- clean_pq(subset_samples(d_asv, Projet == "Cistaceae"), 
                            simplify_taxo = TRUE)
Cleaning suppress 10821 taxa and 0 samples.
Code
d_vs_Camila <- clean_pq(subset_samples(d_vs, Projet == "Camila"), 
                        simplify_taxo = TRUE)
Cleaning suppress 6286 taxa and 0 samples.
Code
d_vs_DIVFUN <- clean_pq(subset_samples(d_vs, Projet == "DIVFUN"), 
                        simplify_taxo = TRUE)
Cleaning suppress 1643 taxa and 10 samples.
Code
d_vs_Cistaceae <- clean_pq(subset_samples(d_vs, Projet == "Cistaceae"), 
                           simplify_taxo = TRUE)
Cleaning suppress 4631 taxa and 0 samples.
Code
dir.create(paste0(here::here("data/data_final/"), "Camila"))
Warning in dir.create(paste0(here::here("data/data_final/"), "Camila")):
'/home/adrien/Bureau/analyse_franck_camilla_jm_2024/bioinfo.starter/data/data_final/Camila'
existe déjà
Code
dir.create(paste0(here::here("data/data_final/"), "DIVFUN"))
Warning in dir.create(paste0(here::here("data/data_final/"), "DIVFUN")):
'/home/adrien/Bureau/analyse_franck_camilla_jm_2024/bioinfo.starter/data/data_final/DIVFUN'
existe déjà
Code
dir.create(paste0(here::here("data/data_final/"), "Cistaceae"))
Warning in dir.create(paste0(here::here("data/data_final/"), "Cistaceae")):
'/home/adrien/Bureau/analyse_franck_camilla_jm_2024/bioinfo.starter/data/data_final/Cistaceae'
existe déjà
Code
save_pq(d_vs_Camila,
  path = paste0(here::here("data/data_final/Camila/"), 
                "otu_reclustering_vsearch")
)
Cleaning suppress 0 taxa and 0 samples.
Joining with `by = join_by(samp_1_S1, samp_10_S10, samp_11_S11, samp_12_S12,
samp_13_S13, samp_14_S14, samp_15_S15, samp_16_S16, samp_17_S17, samp_18_S18,
samp_19_S19, samp_2_S2, samp_20_S20, samp_21_S21, samp_22_S22, samp_23_S23,
samp_24_S24, samp_25_S25, samp_26_S26, samp_27_S27, samp_28_S28, samp_29_S29,
samp_3_S3, samp_30_S30, samp_31_S31, samp_32_S32, samp_33_S33, samp_34_S34,
samp_35_S35, samp_4_S4, samp_41_S38, samp_42_S39, samp_43_S40, samp_44_S41,
samp_45_S42, samp_46_S43, samp_47_S44, samp_48_S45, samp_49_S46, samp_5_S5,
samp_50_S47, samp_51_S48, samp_52_S49, samp_53_S50, samp_54_S51, samp_55_S52,
samp_56_S53, samp_57_S54, samp_58_S55, samp_59_S56, samp_6_S6, samp_60_S57,
samp_61_S58, samp_62_S59, samp_63_S60, samp_64_S61, samp_65_S62, samp_66_S63,
samp_67_S64, samp_69_S66, samp_7_S7, samp_70_S67, samp_71_S68, samp_72_S69,
samp_73_S70, samp_74_S71, samp_75_S72, samp_8_S8, samp_9_S9)`
Cleaning suppress 0 taxa and 0 samples.
Code
save_pq(d_asv_Camila,
  path = paste0(here::here("data/data_final/Camila/"),
                "asv_dada2")
)
Cleaning suppress 0 taxa and 0 samples.
Joining with `by = join_by(samp_1_S1, samp_10_S10, samp_11_S11, samp_12_S12, samp_13_S13, samp_14_S14, samp_15_S15, samp_16_S16, samp_17_S17, samp_18_S18, samp_19_S19, samp_2_S2, samp_20_S20, samp_21_S21, samp_22_S22, samp_23_S23, samp_24_S24, samp_25_S25, samp_26_S26, samp_27_S27, samp_28_S28, samp_29_S29, samp_3_S3, samp_30_S30, samp_31_S31, samp_32_S32, samp_33_S33, samp_34_S34, samp_35_S35, samp_4_S4, samp_41_S38, samp_42_S39, samp_43_S40, samp_44_S41, samp_45_S42, samp_46_S43, samp_47_S44, samp_48_S45, samp_49_S46, samp_5_S5, samp_50_S47, samp_51_S48, samp_52_S49, samp_53_S50, samp_54_S51, samp_55_S52, samp_56_S53, samp_57_S54, samp_58_S55, samp_59_S56, samp_6_S6, samp_60_S57, samp_61_S58, samp_62_S59, samp_63_S60, samp_64_S61, samp_65_S62, samp_66_S63, samp_67_S64, samp_69_S66, samp_7_S7, samp_70_S67, samp_71_S68, samp_72_S69, samp_73_S70, samp_74_S71, samp_75_S72, samp_8_S8, samp_9_S9)`Cleaning suppress 0 taxa and 0 samples.
Code
save_pq(d_vs_DIVFUN,
  path = paste0(here::here("data/data_final/DIVFUN/"),
                "otu_reclustering_vsearch")
)
Cleaning suppress 0 taxa and 0 samples.
Joining with `by = join_by(`samp_C-1-3-1_S236`, `samp_C-1-3-2_S237`, `samp_C-1-3-3_S238`, `samp_C-1-3-4_S239`, `samp_C1-1_S234`, `samp_C1-2_S235`, `samp_C2-1_S241`, `samp_C2-2-1_S242`, `samp_C2-2-2_S243`, `samp_C2-2-3_S244`, `samp_C2-2-5_S246`, `samp_C3-1_S248`, `samp_C4-1_S255`, `samp_C4-2-1_S256`, `samp_C4-2-2_S257`, `samp_C4-2-3_S258`, `samp_C4-2-4_S259`, `samp_C4-3_S261`, `samp_C5-1_S262`, `samp_C5-2-1_S263`, `samp_C5-2-2_S264`, `samp_C5-2-3_S265`, `samp_C5-2-4_S266`, `samp_C5-2-5_S267`, `samp_C5-3_S268`, `samp_C6-1-1_S269`, `samp_C6-1-2_S270`, `samp_C6-1-3_S271`, `samp_C6-1-4_S272`, `samp_C6-1-5_S273`, `samp_C6-2_S274`, samp_CT_S232, `samp_CT1-PL2_S128`, samp_DOM1_S368, samp_DOM2_S369, `samp_LVT1-1_S341`, `samp_LVT1-2-1_S342`, `samp_LVT1-2-2_S343`, `samp_LVT1-2-3_S344`, `samp_LVT1-2-4_S345`, `samp_LVT1-2-5_S346`, `samp_LVT1-3-1_S347`, `samp_LVT1-3-2_S348`, `samp_LVT1-3-3_S349`, `samp_LVT1-3-4_S350`, `samp_LVT1-3-5_S351`, `samp_LVT2-1_S352`, `samp_LVT2-2_S353`, `samp_LVT2-3-1_S354`, `samp_LVT2-3-2_S355`, `samp_LVT2-3-3_S356`, `samp_LVT2-3-4_S357`, `samp_LVT2-3-5_S358`, `samp_LVT3-1_S359`, `samp_LVT3-2_S360`, `samp_LVT3-3_S361`, `samp_LVT4-1_S362`, `samp_LVT4-2_S363`, `samp_LVT4-3_S364`, `samp_LVT5-1_S365`, `samp_LVT5-2_S366`, `samp_LVT5-3_S367`, `samp_PC1-1_S276`, `samp_PC1-2-1_S277`, `samp_PC1-2-2_S278`, `samp_PC1-2-3_S279`, `samp_PC1-2-4_S280`, `samp_PC1-2-5_S281`, `samp_PC1-3_S282`, `samp_PC2-1-1_S283`, `samp_PC2-1-2_S284`, `samp_PC2-1-3_S285`, `samp_PC2-1-4_S286`, `samp_PC2-1-5_S287`, `samp_PC2-2_S288`, `samp_PC2-3_S289`, `samp_PC3-1-1_S290`, `samp_PC3-1-2_S291`, `samp_PC3-1-3_S292`, `samp_PC3-1-5_S294`, `samp_PC3-2_S295`, `samp_PC3-3_S296`, `samp_PC4-1_S297`, `samp_PC4-2-2_S299`, `samp_PC4-2-3_S300`, `samp_PC4-2-5_S302`, `samp_PC4-3_S303`, `samp_PC5-1-1_S304`, `samp_PC5-1-2_S305`, `samp_PC5-1-3_S306`, `samp_PC5-1-4_S307`, `samp_PC5-1-5_S308`, `samp_PC5-2_S309`, `samp_PC5-3_S310`, `samp_PRL1-1-1_S311`, `samp_PRL1-1-2_S312`, `samp_PRL1-1-3_S313`, `samp_PRL1-1-4_S314`, `samp_PRL1-1-5_S315`, `samp_PRL1-2_S316`, `samp_PRL1-3_S317`, `samp_PRL2-1-1_S318`, `samp_PRL2-1-2_S319`, `samp_PRL2-1-3_S320`, `samp_PRL2-1-4_S321`, `samp_PRL2-1-5_S322`, `samp_PRL2-2_S323`, `samp_PRL2-3-1_S324`, `samp_PRL2-3-2_S325`, `samp_PRL2-3-3_S326`, `samp_PRL2-3-4_S327`, `samp_PRL2-3-5_S328`, `samp_PRL3-2_S330`, `samp_PRL3-3_S331`, `samp_PRL3-4_S332`, `samp_PRL4-1_S333`, `samp_PRL4-2_S334`, `samp_PRL4-3_S335`, `samp_PRL5-2_S336`, `samp_PRL5-3_S337`, `samp_PRL6-1_S338`, `samp_PRL6-2_S339`, `samp_PRL6-3_S340`)`Cleaning suppress 0 taxa and 0 samples.
Code
save_pq(d_asv_DIVFUN,
  path = paste0(here::here("data/data_final/DIVFUN/"), 
                "asv_dada2")
)
Cleaning suppress 0 taxa and 0 samples.
Joining with `by = join_by(`samp_C-1-3-1_S236`, `samp_C-1-3-2_S237`, `samp_C-1-3-3_S238`, `samp_C-1-3-4_S239`, `samp_C1-1_S234`, `samp_C1-2_S235`, `samp_C2-1_S241`, `samp_C2-2-1_S242`, `samp_C2-2-2_S243`, `samp_C2-2-3_S244`, `samp_C2-2-5_S246`, `samp_C3-1_S248`, `samp_C4-1_S255`, `samp_C4-2-1_S256`, `samp_C4-2-2_S257`, `samp_C4-2-3_S258`, `samp_C4-2-4_S259`, `samp_C4-3_S261`, `samp_C5-1_S262`, `samp_C5-2-1_S263`, `samp_C5-2-2_S264`, `samp_C5-2-3_S265`, `samp_C5-2-4_S266`, `samp_C5-2-5_S267`, `samp_C5-3_S268`, `samp_C6-1-1_S269`, `samp_C6-1-2_S270`, `samp_C6-1-3_S271`, `samp_C6-1-4_S272`, `samp_C6-1-5_S273`, `samp_C6-2_S274`, samp_CT_S232, `samp_CT1-PL2_S128`, samp_DOM1_S368, samp_DOM2_S369, `samp_LVT1-1_S341`, `samp_LVT1-2-1_S342`, `samp_LVT1-2-2_S343`, `samp_LVT1-2-3_S344`, `samp_LVT1-2-4_S345`, `samp_LVT1-2-5_S346`, `samp_LVT1-3-1_S347`, `samp_LVT1-3-2_S348`, `samp_LVT1-3-3_S349`, `samp_LVT1-3-4_S350`, `samp_LVT1-3-5_S351`, `samp_LVT2-1_S352`, `samp_LVT2-2_S353`, `samp_LVT2-3-1_S354`, `samp_LVT2-3-2_S355`, `samp_LVT2-3-3_S356`, `samp_LVT2-3-4_S357`, `samp_LVT2-3-5_S358`, `samp_LVT3-1_S359`, `samp_LVT3-2_S360`, `samp_LVT3-3_S361`, `samp_LVT4-1_S362`, `samp_LVT4-2_S363`, `samp_LVT4-3_S364`, `samp_LVT5-1_S365`, `samp_LVT5-2_S366`, `samp_LVT5-3_S367`, `samp_PC1-1_S276`, `samp_PC1-2-1_S277`, `samp_PC1-2-2_S278`, `samp_PC1-2-3_S279`, `samp_PC1-2-4_S280`, `samp_PC1-2-5_S281`, `samp_PC1-3_S282`, `samp_PC2-1-1_S283`, `samp_PC2-1-2_S284`, `samp_PC2-1-3_S285`, `samp_PC2-1-4_S286`, `samp_PC2-1-5_S287`, `samp_PC2-2_S288`, `samp_PC2-3_S289`, `samp_PC3-1-1_S290`, `samp_PC3-1-2_S291`, `samp_PC3-1-3_S292`, `samp_PC3-1-5_S294`, `samp_PC3-2_S295`, `samp_PC3-3_S296`, `samp_PC4-1_S297`, `samp_PC4-2-2_S299`, `samp_PC4-2-3_S300`, `samp_PC4-2-5_S302`, `samp_PC4-3_S303`, `samp_PC5-1-1_S304`, `samp_PC5-1-2_S305`, `samp_PC5-1-3_S306`, `samp_PC5-1-4_S307`, `samp_PC5-1-5_S308`, `samp_PC5-2_S309`, `samp_PC5-3_S310`, `samp_PRL1-1-1_S311`, `samp_PRL1-1-2_S312`, `samp_PRL1-1-3_S313`, `samp_PRL1-1-4_S314`, `samp_PRL1-1-5_S315`, `samp_PRL1-2_S316`, `samp_PRL1-3_S317`, `samp_PRL2-1-1_S318`, `samp_PRL2-1-2_S319`, `samp_PRL2-1-3_S320`, `samp_PRL2-1-4_S321`, `samp_PRL2-1-5_S322`, `samp_PRL2-2_S323`, `samp_PRL2-3-1_S324`, `samp_PRL2-3-2_S325`, `samp_PRL2-3-3_S326`, `samp_PRL2-3-4_S327`, `samp_PRL2-3-5_S328`, `samp_PRL3-2_S330`, `samp_PRL3-3_S331`, `samp_PRL3-4_S332`, `samp_PRL4-1_S333`, `samp_PRL4-2_S334`, `samp_PRL4-3_S335`, `samp_PRL5-2_S336`, `samp_PRL5-3_S337`, `samp_PRL6-1_S338`, `samp_PRL6-2_S339`, `samp_PRL6-3_S340`)`Cleaning suppress 0 taxa and 0 samples.
Code
save_pq(d_vs_Cistaceae,
  path = paste0(here::here("data/data_final/Cistaceae/"), 
                "otu_reclustering_vsearch")
)
Cleaning suppress 0 taxa and 0 samples.
Joining with `by = join_by(samp_F1Fe1_S196, `samp_F1Fe1-PL2_S125`, samp_F1Fe2_S197, `samp_F1Fe2-PL2_S133`, `samp_F1Fe2b-PL2_S157`, samp_F1Fe3_S198, `samp_F1Fe3-PL2_S141`, samp_F1Ft1_S199, `samp_F1Ft1b-PL2_S149`, samp_F1Ft2_S200, samp_F1Ft3_S201, `samp_F1Ft3-PL2_S165`, samp_F2Fe1_S202, `samp_F2Fe1-PL2_S78`, samp_F2Fe2_S203, `samp_F2Fe2-PL2_S86`, samp_F2Fe3_S204, `samp_F2Fe3-PL2_S94`, samp_F2Ft1_S205, `samp_F2Ft1b-PL2_S102`, samp_F2Ft2_S206, `samp_F2Ft2b-PL2_S110`, samp_F2Ft3_S207, `samp_F2Ft3b-PL2_S118`, samp_F3Fe1_S208, `samp_F3Fe1-PL2_S126`, samp_F3Fe2_S209, `samp_F3Fe2-PL2_S134`, `samp_F3Fe2b-PL2_S158`, samp_F3Fe3_S210, `samp_F3Fe3-PL2_S142`, samp_F3Ft1_S211, `samp_F3Ft1b-PL2_S150`, samp_F3Ft2_S212, samp_F3Ft3_S213, `samp_F3Ft3-PL2_S166`, samp_P1Fe1_S214, `samp_P1Fe1-PL2_S79`, samp_P1Fe2_S215, `samp_P1Fe2-PL2_S87`, samp_P1Fe3_S216, `samp_P1Fe3-PL2_S95`, samp_P1Ft1_S217, `samp_P1Ft1b-PL2_S103`, samp_P1Ft2_S218, `samp_P1Ft2b-PL2_S111`, `samp_P1Ft3b-PL2_S119`, samp_P2Fe1_S220, `samp_P2Fe1-PL2_S127`, samp_P2Fe2_S221, `samp_P2Fe2-PL2_S135`, `samp_P2Fe2b-PL2_S159`, samp_P2Fe3_S222, samp_P2Ft1_S223, `samp_P2Ft1b-PL2_S151`, samp_P2Ft2_S224, samp_P2Ft3_S225, `samp_P2Ft3-PL2_S167`, samp_P3Fe1_S226, `samp_P3Fe1-PL2_S80`, samp_P3Fe2_S227, `samp_P3Fe2-PL2_S88`, samp_P3Fe3_S228, `samp_P3Fe3-PL2_S96`, samp_P3Ft1_S229, `samp_P3Ft1b-PL2_S104`, samp_P3Ft2_S230, `samp_P3Ft2b-PL2_S112`, samp_P3Ft3_S231, `samp_P3Ft3b-PL2_S120`, samp_T1Fe1_S172, `samp_T1Fe1-PL2_S121`, `samp_t1Fe1b-PL2_S145`, samp_T1Fe2_S173, `samp_T1Fe2-PL2_S129`, `samp_T1Fe2b-PL2_S153`, samp_T1Fe3_S174, `samp_t1Fe3-PL2_S161`, `samp_T1Fe3-PL2a_S137`, samp_T1Ft1_S175, `samp_T1Ft1-PL2_S74`, `samp_t1Ft1b-PL2_S98`, samp_T1Ft2_S176, `samp_T1Ft2-PL2_S82`, `samp_t1Ft2b-PL2_S106`, samp_T1Ft3_S177, `samp_T1Ft3-PL2_S90`, `samp_t1Ft3b-PL2_S114`, samp_T1Ho1_S169, `samp_T1Ho1-PL2_S73`, `samp_t1Ho1b-PL2_S97`, samp_T1Ho2_S170, `samp_T1Ho2-PL2_S81`, `samp_t1Ho2b-PL2_S105`, samp_T1Ho3_S171, `samp_T1Ho3-PL2_S89`, `samp_t1Ho3b-PL2_S113`, samp_T2Fe1_S181, `samp_T2Fe1-PL2_S75`, `samp_t2Fe1b-PL2_S99`, samp_T2Fe2_S182, `samp_T2Fe2-PL2_S83`, `samp_t2Fe2b-PL2_S107`, samp_T2Fe3_S183, `samp_T2Fe3-PL2_S91`, `samp_t2Fe3b-PL2_S115`, samp_T2Ft1_S184, `samp_T2Ft1-PL2_S123`, `samp_t2Ft1b-PL2_S147`, samp_T2Ft2_S185, `samp_T2Ft2-PL2_S131`, `samp_T2Ft2b-PL2_S155`, samp_T2Ft3_S186, `samp_t2Ft3-PL2_S163`, `samp_T2Ft3-PL2a_S139`, samp_T2Ho1_S178, `samp_T2Ho1-PL2_S122`, `samp_t2Ho1b-PL2_S146`, samp_T2Ho2_S179, `samp_T2Ho2-PL2_S130`, `samp_T2Ho2b-PL2_S154`, samp_T2Ho3_S180, `samp_t2Ho3-PL2_S162`, `samp_T2Ho3-PL2a_S138`, samp_T3Fe1_S190, `samp_T3Fe1-PL2_S124`, `samp_t3Fe1b-PL2_S148`, samp_T3Fe2_S191, `samp_T3Fe2-PL2_S132`, `samp_T3Fe2b-PL2_S156`, samp_T3Fe3_S192, `samp_T3Fe3-PL2a_S140`, samp_T3Ft1_S193, `samp_T3Ft1-PL2_S77`, samp_T3Ft2_S194, `samp_T3Ft2-PL2_S85`, `samp_t3Ft2b-PL2_S109`, samp_T3Ft3_S195, `samp_T3Ft3-PL2_S93`, `samp_t3Ft3b-PL2_S117`, samp_T3Ho1_S187, `samp_T3Ho1-PL2_S76`, samp_T3Ho2_S188, `samp_T3Ho2-PL2_S84`, `samp_t3Ho2b-PL2_S108`, samp_T3Ho3_S189, `samp_T3Ho3-PL2_S92`)`Cleaning suppress 0 taxa and 0 samples.
Code
save_pq(d_asv_Cistaceae,
  path = paste0(here::here("data/data_final/Cistaceae/"),
                "asv_dada2")
)
Cleaning suppress 0 taxa and 0 samples.
Joining with `by = join_by(samp_F1Fe1_S196, `samp_F1Fe1-PL2_S125`, samp_F1Fe2_S197, `samp_F1Fe2-PL2_S133`, `samp_F1Fe2b-PL2_S157`, samp_F1Fe3_S198, `samp_F1Fe3-PL2_S141`, samp_F1Ft1_S199, `samp_F1Ft1b-PL2_S149`, samp_F1Ft2_S200, samp_F1Ft3_S201, `samp_F1Ft3-PL2_S165`, samp_F2Fe1_S202, `samp_F2Fe1-PL2_S78`, samp_F2Fe2_S203, `samp_F2Fe2-PL2_S86`, samp_F2Fe3_S204, `samp_F2Fe3-PL2_S94`, samp_F2Ft1_S205, `samp_F2Ft1b-PL2_S102`, samp_F2Ft2_S206, `samp_F2Ft2b-PL2_S110`, samp_F2Ft3_S207, `samp_F2Ft3b-PL2_S118`, samp_F3Fe1_S208, `samp_F3Fe1-PL2_S126`, samp_F3Fe2_S209, `samp_F3Fe2-PL2_S134`, `samp_F3Fe2b-PL2_S158`, samp_F3Fe3_S210, `samp_F3Fe3-PL2_S142`, samp_F3Ft1_S211, `samp_F3Ft1b-PL2_S150`, samp_F3Ft2_S212, samp_F3Ft3_S213, `samp_F3Ft3-PL2_S166`, samp_P1Fe1_S214, `samp_P1Fe1-PL2_S79`, samp_P1Fe2_S215, `samp_P1Fe2-PL2_S87`, samp_P1Fe3_S216, `samp_P1Fe3-PL2_S95`, samp_P1Ft1_S217, `samp_P1Ft1b-PL2_S103`, samp_P1Ft2_S218, `samp_P1Ft2b-PL2_S111`, `samp_P1Ft3b-PL2_S119`, samp_P2Fe1_S220, `samp_P2Fe1-PL2_S127`, samp_P2Fe2_S221, `samp_P2Fe2-PL2_S135`, `samp_P2Fe2b-PL2_S159`, samp_P2Fe3_S222, samp_P2Ft1_S223, `samp_P2Ft1b-PL2_S151`, samp_P2Ft2_S224, samp_P2Ft3_S225, `samp_P2Ft3-PL2_S167`, samp_P3Fe1_S226, `samp_P3Fe1-PL2_S80`, samp_P3Fe2_S227, `samp_P3Fe2-PL2_S88`, samp_P3Fe3_S228, `samp_P3Fe3-PL2_S96`, samp_P3Ft1_S229, `samp_P3Ft1b-PL2_S104`, samp_P3Ft2_S230, `samp_P3Ft2b-PL2_S112`, samp_P3Ft3_S231, `samp_P3Ft3b-PL2_S120`, samp_T1Fe1_S172, `samp_T1Fe1-PL2_S121`, `samp_t1Fe1b-PL2_S145`, samp_T1Fe2_S173, `samp_T1Fe2-PL2_S129`, `samp_T1Fe2b-PL2_S153`, samp_T1Fe3_S174, `samp_t1Fe3-PL2_S161`, `samp_T1Fe3-PL2a_S137`, samp_T1Ft1_S175, `samp_T1Ft1-PL2_S74`, `samp_t1Ft1b-PL2_S98`, samp_T1Ft2_S176, `samp_T1Ft2-PL2_S82`, `samp_t1Ft2b-PL2_S106`, samp_T1Ft3_S177, `samp_T1Ft3-PL2_S90`, `samp_t1Ft3b-PL2_S114`, samp_T1Ho1_S169, `samp_T1Ho1-PL2_S73`, `samp_t1Ho1b-PL2_S97`, samp_T1Ho2_S170, `samp_T1Ho2-PL2_S81`, `samp_t1Ho2b-PL2_S105`, samp_T1Ho3_S171, `samp_T1Ho3-PL2_S89`, `samp_t1Ho3b-PL2_S113`, samp_T2Fe1_S181, `samp_T2Fe1-PL2_S75`, `samp_t2Fe1b-PL2_S99`, samp_T2Fe2_S182, `samp_T2Fe2-PL2_S83`, `samp_t2Fe2b-PL2_S107`, samp_T2Fe3_S183, `samp_T2Fe3-PL2_S91`, `samp_t2Fe3b-PL2_S115`, samp_T2Ft1_S184, `samp_T2Ft1-PL2_S123`, `samp_t2Ft1b-PL2_S147`, samp_T2Ft2_S185, `samp_T2Ft2-PL2_S131`, `samp_T2Ft2b-PL2_S155`, samp_T2Ft3_S186, `samp_t2Ft3-PL2_S163`, `samp_T2Ft3-PL2a_S139`, samp_T2Ho1_S178, `samp_T2Ho1-PL2_S122`, `samp_t2Ho1b-PL2_S146`, samp_T2Ho2_S179, `samp_T2Ho2-PL2_S130`, `samp_T2Ho2b-PL2_S154`, samp_T2Ho3_S180, `samp_t2Ho3-PL2_S162`, `samp_T2Ho3-PL2a_S138`, samp_T3Fe1_S190, `samp_T3Fe1-PL2_S124`, `samp_t3Fe1b-PL2_S148`, samp_T3Fe2_S191, `samp_T3Fe2-PL2_S132`, `samp_T3Fe2b-PL2_S156`, samp_T3Fe3_S192, `samp_T3Fe3-PL2a_S140`, samp_T3Ft1_S193, `samp_T3Ft1-PL2_S77`, samp_T3Ft2_S194, `samp_T3Ft2-PL2_S85`, `samp_t3Ft2b-PL2_S109`, samp_T3Ft3_S195, `samp_T3Ft3-PL2_S93`, `samp_t3Ft3b-PL2_S117`, samp_T3Ho1_S187, `samp_T3Ho1-PL2_S76`, samp_T3Ho2_S188, `samp_T3Ho2-PL2_S84`, `samp_t3Ho2b-PL2_S108`, samp_T3Ho3_S189, `samp_T3Ho3-PL2_S92`)`Cleaning suppress 0 taxa and 0 samples.

Krona for Camila

Code
path_camila <- here::here("data/data_final/Camila/")
krona(d_vs_Camila,
  paste0(path_camila, "krona_unite_nb_seq.html"),
  name = "Nb_seq_Unite",
  ranks=c(1:7)
)
krona(d_vs_Camila,
      nb_seq = FALSE,
  paste0(path_camila, "krona_unite_nb_asv.html"),
  name = "Nb_ASV_Unite",
  ranks=c(1:7)
)
krona(d_vs_Camila,
  paste0(path_camila, "krona_eukaryome_nb_seq.html"),
  name = "Nb_seq_Euk",
  ranks=c(8:14)
)
krona(d_vs_Camila,
      nb_seq = FALSE,
  paste0(path_camila, "krona_eukaryome_nb_asv.html"),
  name = "Nb_ASV_Euk",
  ranks=c(8:14)
)
merge_krona(
  c(
   paste0(path_camila, "krona_unite_nb_asv.html"),
   paste0(path_camila, "krona_eukaryome_nb_asv.html"),
   paste0(path_camila, "krona_unite_nb_seq.html"),
   paste0(path_camila, "krona_eukaryome_nb_seq.html")
  ),
  output = paste0(path_camila, "krona_unite_eukaryome.html")
)
unlink(paste0(path_camila, "krona_unite_nb_asv.html"))
unlink(paste0(path_camila, "krona_unite_nb_seq.html"))
unlink(paste0(path_camila, "krona_eukaryome_nb_asv.html"))
unlink(paste0(path_camila, "krona_eukaryome_nb_seq.html"))

Krona for DIVFUN

Code
path_divfun <- here::here("data/data_final/DIVFUN/")
krona(d_vs_DIVFUN,
  paste0(path_divfun, "krona_unite_nb_seq.html"),
  name = "Nb_seq_Unite",
  ranks=c(1:7)
)
krona(d_vs_DIVFUN,
      nb_seq = FALSE,
  paste0(path_divfun, "krona_unite_nb_asv.html"),
  name = "Nb_ASV_Unite",
  ranks=c(1:7)
)
krona(d_vs_DIVFUN,
  paste0(path_divfun, "krona_eukaryome_nb_seq.html"),
  name = "Nb_seq_Euk",
  ranks=c(8:14)
)
krona(d_vs_DIVFUN,
      nb_seq = FALSE,
  paste0(path_divfun, "krona_eukaryome_nb_asv.html"),
  name = "Nb_ASV_Euk",
  ranks=c(8:14)
)
merge_krona(
  c(
   paste0(path_divfun, "krona_unite_nb_asv.html"),
   paste0(path_divfun, "krona_eukaryome_nb_asv.html"),
   paste0(path_divfun, "krona_unite_nb_seq.html"),
   paste0(path_divfun, "krona_eukaryome_nb_seq.html")
  ),
  output = paste0(path_divfun, "krona_unite_eukaryome.html")
)
unlink(paste0(path_divfun, "krona_unite_nb_asv.html"))
unlink(paste0(path_divfun, "krona_unite_nb_seq.html"))
unlink(paste0(path_divfun, "krona_eukaryome_nb_asv.html"))
unlink(paste0(path_divfun, "krona_eukaryome_nb_seq.html"))

Krona for Cistaceae

Code
path_cistaceae <- here::here("data/data_final/Cistaceae/")
krona(d_vs_Cistaceae,
  paste0(path_cistaceae, "krona_unite_nb_seq.html"),
  name = "Nb_seq_Unite",
  ranks=c(1:7)
)
krona(d_vs_Cistaceae,
      nb_seq = FALSE,
  paste0(path_cistaceae, "krona_unite_nb_asv.html"),
  name = "Nb_ASV_Unite",
  ranks=c(1:7)
)
krona(d_vs_Cistaceae,
  paste0(path_cistaceae, "krona_eukaryome_nb_seq.html"),
  name = "Nb_seq_Euk",
  ranks=c(8:14)
)
krona(d_vs_Cistaceae,
      nb_seq = FALSE,
  paste0(path_cistaceae, "krona_eukaryome_nb_asv.html"),
  name = "Nb_ASV_Euk",
  ranks=c(8:14)
)
merge_krona(
  c(
   paste0(path_cistaceae, "krona_unite_nb_asv.html"),
   paste0(path_cistaceae, "krona_eukaryome_nb_asv.html"),
   paste0(path_cistaceae, "krona_unite_nb_seq.html"),
   paste0(path_cistaceae, "krona_eukaryome_nb_seq.html")
  ),
  output = paste0(path_cistaceae, "krona_unite_eukaryome.html")
)
unlink(paste0(path_cistaceae, "krona_unite_nb_asv.html"))
unlink(paste0(path_cistaceae, "krona_unite_nb_seq.html"))
unlink(paste0(path_cistaceae, "krona_eukaryome_nb_asv.html"))
unlink(paste0(path_cistaceae, "krona_eukaryome_nb_seq.html"))
Code
krona(subset_taxa(d_asv, Genus_Eukaryome=="Apiotrichum"), 
      ranks=c(1:7),
      nb_seq= FALSE,
      file = paste0(here::here("data/data_final/"),
                    "Apiotrichum_fromEUK_uniteTax.html")
      )

Session Information

Session information are detailed below. More information about the machine, the system, as well as python and R packages, are available in the file data/data_final/information_run.txt .

Code
sessionInfo()
R version 4.4.2 (2024-10-31)
Platform: x86_64-pc-linux-gnu
Running under: Debian GNU/Linux 12 (bookworm)

Matrix products: default
BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.11.0 
LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.11.0

locale:
 [1] LC_CTYPE=fr_FR.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=fr_FR.UTF-8        LC_COLLATE=fr_FR.UTF-8    
 [5] LC_MONETARY=fr_FR.UTF-8    LC_MESSAGES=fr_FR.UTF-8   
 [7] LC_PAPER=fr_FR.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=fr_FR.UTF-8 LC_IDENTIFICATION=C       

time zone: Europe/Paris
tzcode source: system (glibc)

attached base packages:
[1] stats     graphics  grDevices datasets  utils     methods   base     

other attached packages:
 [1] emojifont_0.5.5    patchwork_1.3.0    MiscMetabar_0.10.1 purrr_1.0.2       
 [5] dplyr_1.1.4        dada2_1.34.0       Rcpp_1.0.13-1      ggplot2_3.5.1     
 [9] phyloseq_1.50.0    targets_1.9.0      knitr_1.49        

loaded via a namespace (and not attached):
  [1] RColorBrewer_1.1-3          rstudioapi_0.17.1          
  [3] jsonlite_1.8.9              magrittr_2.0.3             
  [5] farver_2.1.2                rmarkdown_2.29             
  [7] zlibbioc_1.52.0             vctrs_0.6.5                
  [9] multtest_2.62.0             memoise_2.0.1              
 [11] Rsamtools_2.22.0            benchmarkme_1.0.8          
 [13] htmltools_0.5.8.1           S4Arrays_1.6.0             
 [15] Rhdf5lib_1.28.0             SparseArray_1.6.0          
 [17] rhdf5_2.50.0                sass_0.4.9                 
 [19] bslib_0.8.0                 htmlwidgets_1.6.4          
 [21] plyr_1.8.9                  cachem_1.1.0               
 [23] conflicted_1.2.0            GenomicAlignments_1.42.0   
 [25] igraph_2.1.1                lifecycle_1.0.4            
 [27] iterators_1.0.14            pkgconfig_2.0.3            
 [29] Matrix_1.7-1                R6_2.5.1                   
 [31] fastmap_1.2.0               GenomeInfoDbData_1.2.13    
 [33] MatrixGenerics_1.18.0       digest_0.6.37              
 [35] showtext_0.9-7              colorspace_2.1-1           
 [37] ShortRead_1.64.0            S4Vectors_0.44.0           
 [39] ps_1.8.1                    rprojroot_2.0.4            
 [41] crosstalk_1.2.1             GenomicRanges_1.58.0       
 [43] base64url_1.4               hwriter_1.3.2.1            
 [45] vegan_2.6-8                 labeling_0.4.3             
 [47] fansi_1.0.6                 httr_1.4.7                 
 [49] abind_1.4-8                 mgcv_1.9-1                 
 [51] compiler_4.4.2              here_1.0.1                 
 [53] withr_3.0.2                 doParallel_1.0.17          
 [55] backports_1.5.0             BiocParallel_1.40.0        
 [57] MASS_7.3-61                 DelayedArray_0.32.0        
 [59] biomformat_1.34.0           permute_0.9-7              
 [61] tools_4.4.2                 ape_5.8                    
 [63] glue_1.8.0                  callr_3.7.6                
 [65] nlme_3.1-166                rhdf5filters_1.18.0        
 [67] grid_4.4.2                  cluster_2.1.6              
 [69] reshape2_1.4.4              ade4_1.7-22                
 [71] generics_0.1.3              gtable_0.3.6               
 [73] data.table_1.16.2           utf8_1.2.4                 
 [75] XVector_0.46.0              BiocGenerics_0.52.0        
 [77] foreach_1.5.2               pillar_1.9.0               
 [79] stringr_1.5.1               benchmarkmeData_1.0.4      
 [81] splines_4.4.2               lattice_0.22-6             
 [83] showtextdb_3.0              renv_1.0.11                
 [85] survival_3.7-0              deldir_2.0-4               
 [87] tidyselect_1.2.1            Biostrings_2.74.0          
 [89] IRanges_2.40.0              SummarizedExperiment_1.36.0
 [91] stats4_4.4.2                xfun_0.49                  
 [93] Biobase_2.66.0              matrixStats_1.4.1          
 [95] DT_0.33                     visNetwork_2.1.2           
 [97] proto_1.0.0                 stringi_1.8.4              
 [99] UCSC.utils_1.2.0            yaml_2.3.10                
[101] evaluate_1.0.1              codetools_0.2-20           
[103] interp_1.1-6                tibble_3.2.1               
[105] autometric_0.1.2            BiocManager_1.30.25        
[107] cli_3.6.3                   RcppParallel_5.1.9         
[109] secretbase_1.0.3            jquerylib_0.1.4            
[111] munsell_0.5.1               processx_3.8.4             
[113] GenomeInfoDb_1.42.0         greenAlgoR_0.1.1           
[115] png_0.1-8                   parallel_4.4.2             
[117] latticeExtra_0.6-30         jpeg_0.1-10                
[119] bitops_1.0-9                pwalign_1.2.0              
[121] viridisLite_0.4.2           scales_1.3.0               
[123] sysfonts_0.8.9              crayon_1.5.3               
[125] rlang_1.1.4                

Citation

BibTeX citation:
@online{taudière2024,
  author = {Taudière, Adrien},
  title = {Bioinformatics Pipeline Summary},
  date = {2024-12-03},
  langid = {en}
}
For attribution, please cite this work as:
Taudière, Adrien. 2024. “Bioinformatics Pipeline Summary.” December 3, 2024.